import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
Look at the chapter on interactive graphics and, specifically, the code to display a subject's MRICloud data as a sunburst plot. Do the following. Display this subject's data as a Sankey diagram. Display as many levels as you can for type = 1, starting from the intracranial volume. Put this in a file called hw4.ipynb.
## Load in multilevel hierarchy data
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
"modify" : "roi",
"modify.1" : "level4",
"modify.2" : "level3",
"modify.3" : "level2",
"modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level4', 'level3', 'level2', 'level1']]
## load in subject data
id = 127
subjectData = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")
subjectData = subjectData.loc[(subjectData.type == 1) & (subjectData.level == 5) & (subjectData.id == id)]
subjectData = subjectData[['roi', 'volume']]
## Merge the subject data with the multilevel data
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
level1sums = subjectData.groupby(['icv','level1'], as_index = False)['comp'].agg(sum)
level2sums = subjectData.groupby(['icv', 'level1', 'level2'], as_index = False)['comp'].agg(sum)
level3sums = subjectData.groupby(['icv', 'level1', 'level2', 'level3'], as_index = False)['comp'].agg(sum)
sankey_dat = pd.merge(level1sums, level2sums, on = 'level1')
sankey_dat.drop('icv_y', axis = 1, inplace = True)
sankey_dat = pd.merge(sankey_dat, level3sums, on = 'level2')
sankey_dat.drop(['icv', 'level1_y'], axis = 1, inplace = True)
labels = np.concatenate((subjectData['icv'].unique(),
subjectData['level1'].unique(),
subjectData['level2'].unique(),
subjectData['level3'].unique())
)
labels = list(labels)
sankey_dat['base_idx'] = 0
sankey_dat['level1_idx'] = sankey_dat['level1_x'].apply(lambda x: labels.index(x))
sankey_dat['level2_idx'] = sankey_dat['level2'].apply(lambda x: labels.index(x))
sankey_dat['level3_idx'] = sankey_dat['level3'].apply(lambda x: labels.index(x))
indices1 = (sankey_dat.loc[:,['base_idx', 'level1_idx', 'comp_x']]
.groupby(['base_idx', 'level1_idx'], as_index = False)['comp_x'].first())
indices2 = (sankey_dat.loc[:,['level1_idx', 'level2_idx', 'comp_y']]
.groupby(['level1_idx', 'level2_idx'], as_index = False)['comp_y'].first())
indices3 = (sankey_dat.loc[:,['level2_idx', 'level3_idx', 'comp']]
.groupby(['level2_idx', 'level3_idx'], as_index = False)['comp'].first())
results = np.vstack((indices1.to_numpy(), indices2.to_numpy(), indices3.to_numpy()))
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(color = "black", width = 0.5),
label = labels,
color = "orange"
),
link = dict(
source = results[:, 0],
target = results[:, 1],
value = results[:, 2]
))])
fig.update_layout(title_text="Sankey Diagram of Brain Composition to Three Levels", font_size=10)
fig.show()
Create an interactive scatter plot of average number of opiod pills by year plot using plotly. See the example here. Don't do the intervals (little vertical lines), only the points. Add your plot to an html file with your repo for your Sanky diagram and host it publicly. Put a link to your hosted file in a markdown cell of your hw4.ipynb file. Note, an easy way to create a webpage with this graphic is to export an ipynb as an html file.
import sqlite3 as sq3
con = sq3.connect("C:\sqlite\opioid.db")
population = pd.read_sql_query("SELECT * from population", con)
annual = pd.read_sql_query("SELECT * from annual", con)
land = pd.read_sql_query("SELECT * from land", con)
con.close()
annual.loc[(annual['BUYER_STATE'] == 'AR') &
(annual['BUYER_COUNTY'] == 'MONTGOMERY'), 'countyfips'] = '05097'
land_area = (land.loc[:, ['Areaname', 'STCOU', 'LND110210D']]
.rename(columns = {'STCOU': 'countyfips'}))
county_info = population.merge(land_area, how = 'left', on = 'countyfips')
annual['Pills_in_millions'] = annual['DOSAGE_UNIT'].astype(float)/1000000
avg_by_year = annual.groupby('year', as_index = False)['Pills_in_millions'].mean()
px.scatter(avg_by_year, x = "year", y = "Pills_in_millions",
labels = dict(year = "Year", Pills_in_millions = "Average # of Pills (in millions)"),
title = "Average Number of Opioid Pills Shipped to a US County Over Time")